start_time <- Sys.time()
# Import functions
root = "./"
source(file.path(root,"general_functions.R"))
import_parameters(params)## **** __Utilized Cores__ **** = 4$subsetGenes
## [1] "protein_coding"
##
## $subsetCells
## [1] "F"
##
## $resolution
## [1] "0.001"
##
## $resultsPath
## [1] "Results/subsetGenes-protein_coding__subsetCells-F__Resolution-0.001__perplexity-30__nCores-4"
##
## $nCores
## [1] "4"
##
## $perplexity
## [1] "30"
load(file.path(resultsPath, "scRNAseq_results.RData"))
# resultsPath <- "Results/subsetGenes-protein_coding__subsetCells-F__Resolution-0.2__perplexity-40__nCores-4"
library(Seurat)
library(cowplot)
library(ggplot2)
library(dplyr)
library(data.table)
library(readxl)
library(reshape2)
library(ggrepel)
library(plotly)
library(GeneOverlap) # BiocManager::install("GeneOverlap")
library(enrichR) #BiocManager::install("enrichR")
library(monocle) # BiocManager::install("monocle")
library(garnett) # devtools::install_github("cole-trapnell-lab/garnett")
#
if(getwd()=="/Users/schilder/Desktop/PD_scRNAseq"){
allGenes <- F
test.use <- "wilcox"
}else{
allGenes <- T
# MAST not install on Minerva...
test.use <- "wilcox"}
allGenes## [1] TRUE
# session_info() #Doesn't work on Minerva?CDS_to_Seurat <- function(cds, export_PC=F, export_UMAP=F, export_tSNE=F){
# sum(colnames(mDAT) != colnames( mDAT@reducedDimS))
## Manually save reduced dimensions
if(export_PC==T){
mDAT$PC1 <- mDAT@normalized_data_projection[,1]
mDAT$PC2 <- mDAT@normalized_data_projection[,2]
mDAT$PC3 <- mDAT@normalized_data_projection[,3]
}
if(export_UMAP==T){
mDAT$UMAP1 <- mDAT@reducedDimA[1,]
mDAT$UMAP2 <- mDAT@reducedDimA[2,]
mDAT$UMAP3 <- mDAT@reducedDimA[3,]
}
DAT <- exportCDS(mDAT, export_to = "Seurat", export_all = T)
DAT@scale.data <- DAT@data #Data was already scaled in Monocle
# DAT <- Seurat::AddMetaData(DAT, pData(mDAT)[c("garnett_cluster","cell_type","cluster_ext_type")])
return(DAT)
}
DAT <- CDS_to_Seurat(mDAT, export_PC = T, export_UMAP = T)## Scaling data matrix
head(DAT@meta.data) ## nGene nUMI orig.ident singlet.or.not.binary HTO
## GTCATTTTCCGCATCT 2035 6729 RAJ_13357 1 NYUMD0011
## ATAAGAGAGGAGTTGC 1914 6718 RAJ_13357 1 BID0076
## CATTATCCATGGTCTA 1971 6156 RAJ_13357 1 MSMD0067
## CTTAGGAGTGGCGAAT 1893 6392 RAJ_13357 1 NYUMD0011
## CATCAGAAGCACCGCT 1967 6110 RAJ_13357 1 MSMD0067
## GCGGGTTAGTAGCCGA 1868 5977 RAJ_13357 1 NYUMD0011
## ID percent.mito res.2 res.1 res.0.6 res.0.3
## GTCATTTTCCGCATCT MSMD0207 0.04191439 0 1 0 0
## ATAAGAGAGGAGTTGC BIMD0076 0.04362066 0 1 0 0
## CATTATCCATGGTCTA NYUMD0015 0.03086921 0 1 0 0
## CTTAGGAGTGGCGAAT MSMD0207 0.03613892 0 1 0 0
## CATCAGAAGCACCGCT NYUMD0015 0.04437531 19 10 8 0
## GCGGGTTAGTAGCCGA MSMD0207 0.03514056 6 2 0 0
## barcode dx mut Ethnicity Gender Age
## GTCATTTTCCGCATCT GTCATTTTCCGCATCT PD PD White M 71
## ATAAGAGAGGAGTTGC ATAAGAGAGGAGTTGC PD GBA White M 47
## CATTATCCATGGTCTA CATTATCCATGGTCTA control control White M 51
## CTTAGGAGTGGCGAAT CTTAGGAGTGGCGAAT PD PD White M 71
## CATCAGAAGCACCGCT CATCAGAAGCACCGCT control control White M 51
## GCGGGTTAGTAGCCGA GCGGGTTAGTAGCCGA PD PD White M 71
## Size_Factor louvain_component Cluster garnett_cluster
## GTCATTTTCCGCATCT 5.117674 1 1 12
## ATAAGAGAGGAGTTGC 5.109379 1 1 12
## CATTATCCATGGTCTA 4.661479 1 1 2
## CTTAGGAGTGGCGAAT 4.844786 1 1 2
## CATCAGAAGCACCGCT 4.610054 1 1 2
## GCGGGTTAGTAGCCGA 4.508861 1 1 5
## cell_type cluster_ext_type cluster_ext_type_filt PC1
## GTCATTTTCCGCATCT Monocytes Monocytes Monocytes 21.74363
## ATAAGAGAGGAGTTGC Monocytes Monocytes Monocytes 20.51278
## CATTATCCATGGTCTA Monocytes Monocytes Monocytes 20.73024
## CTTAGGAGTGGCGAAT Monocytes Monocytes Monocytes 21.48480
## CATCAGAAGCACCGCT Monocytes Monocytes Monocytes 18.94471
## GCGGGTTAGTAGCCGA Monocytes Monocytes Monocytes 17.77910
## PC2 PC3 UMAP1 UMAP2 UMAP3
## GTCATTTTCCGCATCT 0.72274985 -2.813874 1.365752 1.513548 1.161469
## ATAAGAGAGGAGTTGC 1.02458096 -3.651205 1.354717 1.488010 1.172568
## CATTATCCATGGTCTA -0.03820978 -2.694958 1.351607 1.519749 1.158323
## CTTAGGAGTGGCGAAT -0.42949109 -1.943783 1.369097 1.527969 1.158835
## CATCAGAAGCACCGCT 4.19589928 -4.857566 1.305207 1.470326 1.143383
## GCGGGTTAGTAGCCGA 2.61114660 -7.713996 1.303913 1.455874 1.130939
gene_gene_plot <- function(DAT, markerList, colorby, title="", plot=T, legend=T, filterZeros=T){
markerData <- DAT@data[row.names(DAT@data) %in% markerList,] %>% t() %>%
as.matrix() %>% data.table(keep.rownames = T, key = "rn")
markerData[markerData$markerList[1]==0,] <- NA
markerData[markerData$markerList[2]==0,] <- NA
colnames(markerData)[2:length(markerData)] <- c("Gene1", "Gene2")
## Efficiently merge using data.table
# dt1 <- data.table(markerData, keep.rownames = "cell_type", key = "cell_type") %>% copy()
dt2 <- data.table(DAT@meta.data, keep.rownames = "Cell", key = "Cell") %>% copy()
row.names(dt2) <- dt2$Cell
markerDT <- markerData[dt2]
if(filterZeros==T){markerDT <- subset(markerDT, Gene1!=0 & Gene2!=0)}
p <- ggplot(data = markerDT, aes(x=Gene1, y=Gene2 )) +
stat_density_2d(aes(fill = ..level..),
geom = "polygon", colour="purple", bins = 10, size=.1) +
geom_point(aes(color=as.factor(eval(parse(text=colorby)))), shape=16, stroke=0, size=2, alpha=.5) +
guides(colour = guide_legend(title=colorby, override.aes = list(alpha = 1))) +
scale_color_manual(values = pretty_colors(mDAT, var = colorby)) +
labs(x=markerList[1], y=markerList[2], title=title) +
geom_smooth(method="lm")
if(legend==F){p <- p + theme(legend.position = "none")}
if(plot==T){print(p)}
return(p)
} p <- gene_gene_plot(DAT, c("CD14", "FCGR3A"), colorby="cluster_ext_type_filt",
title="Monocyte Subtype Markers")d <- DAT@data@Dimnames[[1]]
HLA_genes <- d[startsWith(d, "HLA-DR")]
HLA_1 <- gene_gene_plot(DAT, c("LRRK2", HLA_genes[1]), colorby="cluster_ext_type_filt",
title="", plot = F, legend=F)
HLA_2 <- gene_gene_plot(DAT, c("LRRK2", HLA_genes[2]), colorby="cluster_ext_type_filt",
title="", plot = F, legend=F)
HLA_3 <- gene_gene_plot(DAT, c("LRRK2", HLA_genes[3]), colorby="cluster_ext_type_filt",
title="", plot = F, legend=F)
plot_grid(HLA_1, HLA_2, HLA_3)Need to get tSNE from Monocle into Seurat somehow.
FeaturePlot(DAT,features.plot = c("CD14","FCGR3A"),
cols.use = c("grey","red","blue","purple"),
dark.theme = T, nCol = 2, overlay = T, no.legend = F) FeaturePlot(DAT,features.plot = c("LRRK2", "GBA"),
cols.use = c("grey","purple","cyan","blue"),
dark.theme = T, nCol = 2, overlay = T, no.legend = F) FeaturePlot(DAT, features.plot = c("MS4A4A","MS4A6A"),
cols.use = c("grey","red","green","blue"),
dark.theme = T, nCol = 2, overlay = T, no.legend = F)Remove filters to get all DEGs.
clustDAT <- SubsetData(DAT, subset.name = "Cluster", accept.value = c(1,2), do.scale = F)
# subDAT <- SubsetData(DAT, cells.use = 1:500)
DEGs_monocytes <- runDGE(DAT, meta_var = "Cluster", group1 = 1, group2 = 2,
allGenes = allGenes, test.use = test.use)# DEGs_monocytes <- FindMarkers(DAT, min.pct = 0.25, only.pos = F,
# ident.1 = 1, ident.2 = 2, test.use = "wilcox"
# )
# DEGs_monocytes <- read.csv("Results/MonocyteSubtype.markers.csv", row.names = 1)
createDT(DEGs_monocytes, caption="DEGs between cluster 1 (CD16- monocytes) and cluster 2 (CD16+ monocytes")## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
write.csv(DEGs_monocytes, file.path(resultsPath, "MonocyteSubtype.markers.csv"), quote = F, row.names = T)identify_unique_markers <-function(DAT, clusterA, clusterB, allGenes=F, test.use="wilcox"){
DAT <- SetIdent(DAT, ident.use = DAT@meta.data$Cluster)
if(allGenes==F){
clustA.markers <- FindMarkers(DAT, ident.1=clusterA, min.pct = 0.25,
only.pos = F, test.use = test.use)
clustB.markers <- FindMarkers(DAT, ident.1=clusterB, min.pct = 0.25,
only.pos = F, test.use = test.use)
}else{
clustA.markers <- FindMarkers(DAT, ident.1=clusterA,
only.pos = F, test.use = test.use,
logfc.threshold = 0, min.pct = 0, min.cells.group = 1,
min.cells.gene = 1, min.diff.pct = -Inf)
clustB.markers <- FindMarkers(DAT, ident.1=clusterB,
only.pos = F, test.use = test.use,
logfc.threshold = 0, min.pct = 0, min.cells.group = 1,
min.cells.gene = 1, min.diff.pct = -Inf)
}
clustA.uniqueMarkers <- clustA.markers[!(row.names(clustA.markers) %in% row.names(clustB.markers)),] %>%
subset(p_val_adj<=0.05)
clustB.uniqueMarkers <- clustB.markers[!(row.names(clustB.markers) %in% row.names(clustA.markers)),] %>%
subset(p_val_adj<=0.05)
difference <- abs( length(row.names(clustA.uniqueMarkers)) - length(row.names(clustB.uniqueMarkers) ) )
uniqueMarkers <- data.frame(Cluster0_markers=c(row.names(clustA.uniqueMarkers), rep("",difference) ),
Cluster1_markers=row.names(clustB.uniqueMarkers))
write.csv(uniqueMarkers,
file.path(resultsPath,"unique_cluster_markers.csv"),
quote = F, row.names = F)
createDT(uniqueMarkers, "Unique/Mutually Exclusive Markers of Cluster 1 and Cluster 2")
return(uniqueMarkers)
}
uniqueMarkers <- identify_unique_markers(clustDAT, clusterA = 1, clusterB = 2,
allGenes=F, test.use = test.use)# clustDAT@meta.data$dx %>% unique()
# FDR <- 0.05/dim(DEGs)[1]
# subset(DEGs, p_val<FDR)
DEGs <- runDGE(clustDAT, meta_var = "dx", group1 = "PD", group2 = "control",
allGenes = F, test.use = test.use) # clustDAT@meta.data$dx %>% unique()
DEGs <- runDGE(clustDAT, meta_var = "mut", group1 = "PD", group2 = "GBA",
allGenes = F, test.use = test.use)## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
DGE_within_clusters(clustDAT, meta_var = "dx", group1 = "PD", group2 = "control",
clusterList = c(1,2), allGenes = F, test.use = test.use)## Error in DGE_within_clusters(clustDAT, meta_var = "dx", group1 = "PD", : unused argument (test.use = test.use)
DGE_within_clusters(DAT, meta_var = "mut", group1 = "PD", group2 = "GBA",
clusterList = c(1,2), allGenes = F, test.use = test.use)## Error in DGE_within_clusters(DAT, meta_var = "mut", group1 = "PD", group2 = "GBA", : unused argument (test.use = test.use)
enrichr_dbs <- c("KEGG_2018", "Reactome_2016",
"GO_Biological_Process_2018", "GO_Molecular_Function_2018", "GO_Cellular_Component_2018",
"Rare_Diseases_AutoRIF_ARCHS4_Predictions", "Human_Gene_Atlas")
# createDT(enrichR::listEnrichrDbs(), "Enrichr Databases")
geneList <- data.frame(Gene=row.names(DEGs_monocytes),
Weight=scales::rescale(length(DEGs_monocytes$p_val_adj):1))
results <- enrichr(genes = geneList, databases = enrichr_dbs ) Uploading data to Enrichr…
## Error in curl::curl_fetch_memory(url, handle = handle): Couldn't connect to server
for (db in enrichr_dbs){
cat('\n')
cat("##",db,"\n")
# res <- subset(results[[db]], Adjusted.P.value<=0.05)
createDT_html(results[[db]], paste("Enrichr Results:",db))
cat('\n')
} ## Error in crosstalk::is.SharedData(data): object 'results' not found
save.image(file.path(resultsPath, "scRNAseq_results.RData"))
end_time <- Sys.time()
end_time - start_time## Time difference of 1.022966 hours